##########################################
########## Topic Validation ###############

# Data In: "tone_validation.csv"
          
# Data Out: 
# Figures A4, A5, A6, A7

###########################################

#Load Packages
library(readr)
library(tidyverse)
library(lubridate)
library(zoo)


# Set options for plotting 
options(scipen=999999)

# Set working directory to replication folder (Refugee_Info_Replication)

# Open code file from "Refugee_Info_Replication/code/post_type_analysis.R"

# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path

# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
  # Calculate the parent directory of the script's directory
  parent_directory <- dirname(dirname(script_path))
  
  # Set the working directory to the parent directory
  setwd(parent_directory)
} else {
  cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()

# Read in Topic Validation Data
data_scored<-read_csv("data/tone_validation.csv")

#############
# Figure A4 #
#############

# Make encouraging & discouraging dummies 
data_scored$encouraging<-ifelse(data_scored$cleaned_score=="encouraging",1,0)
data_scored$discouraging<-ifelse(data_scored$cleaned_score=="discouraging",1,0)

gpt_agg<-data_scored%>%
  group_by(post_type)%>%
  summarise(encouraging_total = sum(encouraging),
            discouraging_total = sum(discouraging),
            total_post_type=n(),
            encouraging_prop=sum(encouraging/n()),
            discouraging_prop=sum(discouraging/n()))
gpt_agg$difference<-gpt_agg$encouraging_total-gpt_agg$discouraging_total
gpt_agg$difference_prop<-gpt_agg$encouraging_prop-gpt_agg$discouraging_prop
gpt_agg<-na.omit(gpt_agg)
# Plot 
ggplot(gpt_agg, aes(fill=post_type, y=difference_prop, x=post_type)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Prop.Encouraging - Prop. Discouraging Posts", x = "Source")+
  theme(axis.text.x=element_text(angle=90,hjust=1))+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure_A4.pdf", width = 11, height = 7)


#############
# Figure A5 #
#############

# gen year-month variable
data_scored <- data_scored %>%
  mutate(yearmon = as.Date(as.yearmon(date)))


gpt_agg_date<-data_scored%>%
  group_by(post_type, yearmon)%>%
  summarise(encouraging_total = sum(encouraging),
            discouraging_total = sum(discouraging),
            total_post_type=n(),
            encouraging_prop=sum(encouraging/n()),
            discouraging_prop=sum(discouraging/n()))
gpt_agg_date$difference<-gpt_agg_date$encouraging_total-gpt_agg_date$discouraging_total
gpt_agg_date$difference_prop<-gpt_agg_date$encouraging_prop-gpt_agg_date$discouraging_prop
gpt_agg_date<-na.omit(gpt_agg_date)

official_unofficial<-subset(gpt_agg_date, post_type!="news")

official_unofficial %>% 
  ggplot() + 
  aes(x = yearmon, y = difference_prop, group=post_type, color=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=.2, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts \n (Encouraging-Discouraging)", x = "Date")+
  theme_minimal(base_size=22)+
  scale_color_grey()
ggsave("plots/Figure_A5.pdf", width = 11, height = 7)

#############
# Figure A6 #
#############

#Average Engagement Per post by Source and sentiment

gpt_agg_engagement<-data_scored%>%
  group_by(post_type, cleaned_score)%>%
  summarise(mean_engagement = mean(engagement_fb))
gpt_agg_engagement<-na.omit(gpt_agg_engagement)
gpt_agg_engagement<-subset(gpt_agg_engagement, cleaned_score!="neither")

ggplot(gpt_agg_engagement, aes(fill=cleaned_score, y=mean_engagement, x=post_type)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Average Engagement per Post \n", x = "Source")+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure_A6.pdf", width = 11, height = 7)

#############
# Figure A7 #
#############

official_only<-subset(data_scored, post_type=="official")

agg2016<-official_only%>%
  group_by(post_2016)%>%
  summarise(encouraging_total = sum(encouraging),
            discouraging_total = sum(discouraging),
            total_source=n(),
            encouraging_prop=sum(encouraging/n()),
            discouraging_prop=sum(discouraging/n()))

agg2016$difference<-agg2016$encouraging_total-agg2016$discouraging_total
agg2016$difference_prop<-agg2016$encouraging_prop-agg2016$discouraging_prop
agg2016$post_2016<-ifelse(agg2016$post_2016==1, "post-2016 source", "pre-2016 source")

ggplot(agg2016, aes(fill=post_2016, y=difference_prop, x=post_2016)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Proportion of Posts \n (Encouraging-Discouraging)", x = "Source")+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure_A7.pdf", width = 11, height = 7)





